{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Graph Deep Learning for NLP\n",
    "\n",
    "This notebook demonstrates implementations of text summarization using TextRank algorithm and an AMR-to-Text summarizer using Graph Neural Networks."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. TextRank Implementation\n",
    "\n",
    "First, we'll implement the TextRank algorithm for extractive text summarization."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import required libraries\n",
    "import networkx as nx\n",
    "import numpy as np\n",
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def textrank(sentences, top_n=2):\n",
    "    \"\"\"Implement TextRank algorithm for text summarization\n",
    "    \n",
    "    Args:\n",
    "        sentences (list): List of sentences\n",
    "        top_n (int): Number of sentences to return\n",
    "        \n",
    "    Returns:\n",
    "        list: Top n ranked sentences\n",
    "    \"\"\"\n",
    "    # Create TF-IDF matrix\n",
    "    tfidf = TfidfVectorizer().fit_transform(sentences)\n",
    "    similarity_matrix = cosine_similarity(tfidf)\n",
    "    \n",
    "    # Create graph and compute pagerank\n",
    "    graph = nx.from_numpy_array(similarity_matrix)\n",
    "    scores = nx.pagerank(graph)\n",
    "    \n",
    "    # Sort sentences by score\n",
    "    ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)\n",
    "    return [s for _, s in ranked_sentences[:top_n]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Test TextRank Implementation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original Text:\n",
      " \n",
      "Natural language processing (NLP) is a subfield of linguistics, computer science, and artificial intelligence concerned with the interactions between computers and human language.\n",
      "The ultimate objective of NLP is to read, decipher, understand, and make sense of human languages in a valuable way.\n",
      "NLP is used in many applications, including machine translation, speech recognition, and chatbots.\n",
      "\n",
      "\n",
      "Summary:\n",
      " The ultimate objective of NLP is to read, decipher, understand, and make sense of human languages in a valuable way Natural language processing (NLP) is a subfield of linguistics, computer science, and artificial intelligence concerned with the interactions between computers and human language\n"
     ]
    }
   ],
   "source": [
    "# Sample text for testing\n",
    "text = \"\"\"\n",
    "Natural language processing (NLP) is a subfield of linguistics, computer science, and artificial intelligence concerned with the interactions between computers and human language.\n",
    "The ultimate objective of NLP is to read, decipher, understand, and make sense of human languages in a valuable way.\n",
    "NLP is used in many applications, including machine translation, speech recognition, and chatbots.\n",
    "\"\"\"\n",
    "\n",
    "# Preprocess and summarize\n",
    "sentences = [s.strip() for s in text.strip().split('.')]\n",
    "sentences = [s for s in sentences if s]  # Remove empty sentences\n",
    "\n",
    "summary = textrank(sentences)\n",
    "print(\"Original Text:\\n\", text)\n",
    "print(\"\\nSummary:\\n\", ' '.join(summary))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. AMR-to-Text Summarizer Implementation\n",
    "\n",
    "Now we'll implement the Abstract Meaning Representation (AMR) to text summarizer using Graph Neural Networks."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "from torch_geometric.nn import GCNConv\n",
    "from torch_geometric.data import Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "class AMRToTextSummarizer(nn.Module):\n",
    "    def __init__(self, input_dim, hidden_dim, output_dim):\n",
    "        super().__init__()\n",
    "        self.graph_conv = GCNConv(input_dim, hidden_dim)\n",
    "        self.gru = nn.GRU(hidden_dim, hidden_dim, batch_first=True)\n",
    "        self.fc = nn.Linear(hidden_dim, output_dim)\n",
    "    \n",
    "    def forward(self, data):\n",
    "        x, edge_index = data.x, data.edge_index\n",
    "        \n",
    "        # Graph convolution\n",
    "        h = self.graph_conv(x, edge_index)\n",
    "        h = F.relu(h)\n",
    "        \n",
    "        # Sequence decoding\n",
    "        h = h.unsqueeze(0)  # Add batch dimension\n",
    "        output, _ = self.gru(h)\n",
    "        output = self.fc(output)\n",
    "        \n",
    "        return output"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Test AMR-to-Text Summarizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Summary logits shape: torch.Size([1, 3, 10000])\n",
      "\n",
      "Model Architecture:\n",
      "AMRToTextSummarizer(\n",
      "  (graph_conv): GCNConv(100, 256)\n",
      "  (gru): GRU(256, 256, batch_first=True)\n",
      "  (fc): Linear(in_features=256, out_features=10000, bias=True)\n",
      ")\n"
     ]
    }
   ],
   "source": [
    "# Set model parameters\n",
    "input_dim = 100  # Dimension of input node features\n",
    "hidden_dim = 256\n",
    "output_dim = 10000  # Vocabulary size\n",
    "\n",
    "# Create sample data\n",
    "edge_index = torch.tensor([[0, 1, 2], [1, 2, 0]], dtype=torch.long)\n",
    "x = torch.randn(3, input_dim)\n",
    "data = Data(x=x, edge_index=edge_index)\n",
    "\n",
    "# Initialize and test model\n",
    "model = AMRToTextSummarizer(input_dim, hidden_dim, output_dim)\n",
    "summary_logits = model(data)\n",
    "print(\"Summary logits shape:\", summary_logits.shape)\n",
    "\n",
    "# Print model architecture\n",
    "print(\"\\nModel Architecture:\")\n",
    "print(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
